########################################################################
# DoFinalProcessing.R
# This file does some final pre-processing to prepare the files for analysis.
# It also copies the final analysis files to the ./Data folder for replication.
#
# Please address any questions about this process to Ryan Jablonski, r.s.jablonski@lse.ac.uk
#
########################################################################

library(data.table)
library(digest)
library(dplyr)
library(plyr)

#school specific data
schools.lc=read.csv("./output/Schools.forLC.withdistances_updated.csv")
schools.mp=read.csv("./output/Schools.forMP.withdistances_updated.csv")

#election results by office
lc.elec=read.csv(".\\input\\lc_results.csv", stringsAsFactors = FALSE)
mp.elec=read.csv(".\\input\\mp_results.csv", stringsAsFactors = FALSE)


followupsurvey=read.csv("..\\Original Survey Files\\CouncilorFollowUpSurvey.csv")

#survey data
mp.survey=read.csv("./output/mp_withcovariates.csv", stringsAsFactors = FALSE)
c.survey=read.csv("./output/c_withcovariates.csv", stringsAsFactors = FALSE)
all.surveys=read.csv("./output/all.surveys.withgoogledistance.csv", stringsAsFactors = FALSE)


#survey data including attritted
c.survey.all=read.csv("./output/c_all_withcovariates.csv", stringsAsFactors = FALSE)
mp.survey.all=read.csv("./output/mp_all_withcovariates.csv", stringsAsFactors = FALSE)

quiz.df=read.csv("./output/quiz_with_covariates.csv")


#This corrects an error in the coding of the information_aid and transparency treatment variables
all.surveys$information_aid=ifelse(all.surveys$treatment=="K", 1, all.surveys$information_aid)
c.survey$information_aid=ifelse(c.survey$treatment=="K", 1, c.survey$information_aid)
mp.survey$information_aid=ifelse(mp.survey$treatment=="K", 1, mp.survey$information_aid)

all.surveys$transparency_all=ifelse(all.surveys$transparency_condition %in% c("both", "donor_audit", "donor_audit + radio", "radio", "radio + donor_audit"), 1, 0)
all.surveys$transparency_radio=ifelse(all.surveys$transparency_condition %in% c("both", "donor_audit + radio", "radio", "radio + donor_audit"), 1, 0)
all.surveys$transparency_donor=ifelse(all.surveys$transparency_condition %in% c("both", "donor_audit + radio", "radio + donor_audit", "donor_audit"), 1, 0)

c.survey$transparency_all=ifelse(c.survey$transparency_condition %in% c("both", "donor_audit", "donor_audit + radio", "radio", "radio + donor_audit"), 1, 0)
c.survey$transparency_radio=ifelse(c.survey$transparency_condition %in% c("both", "donor_audit + radio", "radio", "radio + donor_audit"), 1, 0)
c.survey$transparency_donor=ifelse(c.survey$transparency_condition %in% c("both", "donor_audit + radio", "radio + donor_audit", "donor_audit"), 1, 0)

mp.survey$transparency_all=ifelse(mp.survey$transparency_condition %in% c("both", "donor_audit", "donor_audit + radio", "radio", "radio + donor_audit"), 1, 0)
mp.survey$transparency_radio=ifelse(mp.survey$transparency_condition %in% c("both", "donor_audit + radio", "radio", "radio + donor_audit"), 1, 0)
mp.survey$transparency_donor=ifelse(mp.survey$transparency_condition %in% c("both", "donor_audit + radio", "radio + donor_audit", "donor_audit"), 1, 0)

#some merging for the in sample tests
schools.lc$total_turnout = lc.elec[match(schools.lc$ps_ward_id, lc.elec$WardId),"Turnout"]
schools.lc$total_regvoters = lc.elec[match(schools.lc$ps_ward_id, lc.elec$WardId),"RegVoters"]
schools.lc$total_percent = lc.elec[match(schools.lc$ps_ward_id, lc.elec$WardId),"IncumbentPercent"]
schools.lc$total_vm = lc.elec[match(schools.lc$ps_ward_id, lc.elec$WardId),"IncumbentVictoryMargin"]

mp.elec_winner=mp.elec[mp.elec$rank==1,]
mp.elec_notwinner=mp.elec[mp.elec$rank==2,]

schools.mp$total_turnout = mp.elec_winner[match(schools.mp$constituencyid, mp.elec_winner$constituency),"total"]
#schools.mp$total_regvoters = mp.elec_winner[match(schools.mp$constituencyid, mp.elec_winner$constituency),"total"]
schools.mp$total_percent = mp.elec_winner[match(schools.mp$constituencyid, mp.elec_winner$constituency),"vote_share"]
schools.mp$total_percent_opp = mp.elec_notwinner[match(schools.mp$constituencyid, mp.elec_notwinner$constituency),"vote_share"]
schools.mp$total_vm =schools.mp$total_percent-schools.mp$total_percent_opp

schools.lc$DPP=ifelse(schools.lc$winner_party_local=="DPP", 1, 0)
schools.lc$MCP=ifelse(schools.lc$winner_party_local=="MCP", 1, 0)
schools.lc$PP=ifelse(schools.lc$winner_party_local=="PP", 1, 0)
schools.lc$Independent=ifelse(schools.lc$winner_party_local=="Ind", 1, 0)
schools.lc$UDF=ifelse(schools.lc$winner_party_local=="UDF", 1, 0)


schools.mp$DPP=ifelse(schools.mp$winner_party_local=="DPP", 1, 0)
schools.mp$MCP=ifelse(schools.mp$winner_party_local=="MCP", 1, 0)
schools.mp$PP=ifelse(schools.mp$winner_party_local=="PP", 1, 0)
schools.mp$Independent=ifelse(schools.mp$winner_party_local=="Ind", 1, 0)
schools.mp$UDF=ifelse(schools.mp$winner_party_local=="UDF", 1, 0)

#schools.lc=select_if(schools.lc, is.numeric)
#schools.mp=select_if(schools.lc, is.numeric)

schools.lc$insample=NA
schools.lc$insample = ifelse(schools.lc$ps_ward_id %in% all.surveys[all.surveys$mp==0,]$ps_ward_id, 1, 0)

schools.mp$insample=NA
schools.mp$insample = ifelse(schools.mp$constituencyid %in% all.surveys[all.surveys$mp==1,]$constituencyid, 1, 0)


vars.to.keep=c("ps_ward_id", "school_home_distance", "constituencyid", "insample", "school_enrollment_total", "school_num_teachers", "student_to_teacher_ratio", "past_aid_project", "num_schools_in_ward_tot", "total_turnout", "total_vm", "total_regvoters", "total_percent", "DPP", "UDF", "MCP", "PP", "Independent", "pop_per_hectacre")
schools.lc=schools.lc[,vars.to.keep]


vars.to.keep=c("ps_ward_id", "school_home_distance", "constituencyid", "insample", "school_enrollment_total", "school_num_teachers", "student_to_teacher_ratio", "past_aid_project", "num_schools_in_constituency_tot", "DPP", "UDF", "MCP", "PP", "Independent", "pop_per_hectacre","total_turnout", "total_vm", "total_percent")
#total_regvoters
schools.mp=schools.mp[,vars.to.keep]



#####SETUP QUIZ DATA########
quiz.df$X.1=NULL
quiz.df$X=NULL
quiz.df$school_latitude=NULL
quiz.df$school_longitude=NULL

all.surveys.ag = aggregate(mp.survey$information_aid, by=list(mp.survey$resp_id), FUN=max)
mp.survey$any_aid_treat = all.surveys.ag[match(mp.survey$resp_id, all.surveys.ag$Group.1),"x"]

all.surveys.ag = aggregate(c.survey$information_aid, by=list(c.survey$resp_id), FUN=max)
c.survey$any_aid_treat = all.surveys.ag[match(c.survey$resp_id, all.surveys.ag$Group.1),"x"]


write.csv(schools.lc, "..\\data\\lc_schools_all.csv")
write.csv(schools.mp, "..\\data\\mp_schools_all.csv")


#######SETUP ORIGINAL SURVEY FILES AND ANONYMIZE##########
library(readstata13)
usefuldata=read.csv("./input/categorization_of_how_useful_question.csv")
lcs=read.dta13("../Original Survey Files/AID_MALAWI_COUNCILOR_withids.dta")
mps=read.dta13("../Original Survey Files/AID_MALAWI_MP_withids.dta")
all=rbind.fill(lcs,mps)

usefuldata$all.aid_63=all[match(usefuldata$all.instanceID,all$instanceID),"aid_63"]

write.csv(usefuldata, "..\\data\\how_useful_survey_questions.csv")



#######FIX COMPLIANCE ISSUES and CREATE VARIABLES##########

#These are issues with maps and understanding
all.surveys$notunderstandmap=ifelse(all.surveys$aid_22 %in% c("School B"), 0, 1)
mp.survey$notunderstandmap=ifelse(mp.survey$aid_22 %in% c("School B"), 0, 1)
c.survey$notunderstandmap=ifelse(c.survey$aid_22 %in% c("School B"), 0, 1)

all.surveys$otherschool=ifelse(all.surveys$aid_72 %in% c("no", "none", "nobe", "None", "", "he did not mention any", "bone", ".", "0", 0, "o", "np"), 0, 1)
mp.survey$otherschool=ifelse(mp.survey$aid_72 %in% c("no", "none", "nobe", "None", "", "he did not mention any", "bone", ".", "0", 0, "o", "np"), 0, 1)
c.survey$otherschool=ifelse(c.survey$aid_72 %in% c("no", "none", "nobe", "None", "", "he did not mention any", "bone", ".", "0", 0, "o", "np"), 0, 1)


#respondent said the school was not in constituency
all.surveys$wrongschool=ifelse(all.surveys$aid_73 %in% c("no", "none", "nobe", "None","Not necessarily but there were some schools that he would have", "non", "n", "k", "", "he did not mention any", "bone", ".", "0", 0, "o", "np",
                                                         "Ntsanyale is not on the list but it requires school blocks. it is in need of school kit", 
                                                         "he insisted to chose a school which was not on the map but l explained to him that he is supposed to concentrate and select school which are on the map",
                                                         "changamire primary school should be given the materials because most of the schoolgoing children go to fish in the lake an or go South Africa so in order to encourage children to go to school",
                                                         "the MP was very friendly and he expressed",
                                                         "solar lumps can fit on map numbtwo dictionaries can go to MP number one and the school should be chiunjiza school kit schools go to chankhandwe"),
                               0, 1)


mp.survey$wrongschool=ifelse(mp.survey$aid_73 %in% c("no", "none", "nobe", "None","Not necessarily but there were some schools that he would have", "non", "n", "k", "", "he did not mention any", "bone", ".", "0", 0, "o", "np",
                                                     "Ntsanyale is not on the list but it requires school blocks. it is in need of school kit", 
                                                     "he insisted to chose a school which was not on the map but l explained to him that he is supposed to concentrate and select school which are on the map",
                                                     "changamire primary school should be given the materials because most of the schoolgoing children go to fish in the lake an or go South Africa so in order to encourage children to go to school",
                                                     "the MP was very friendly and he expressed",
                                                     "solar lumps can fit on map numbtwo dictionaries can go to MP number one and the school should be chiunjiza school kit schools go to chankhandwe"), 0, 1)
c.survey$wrongschool=ifelse(c.survey$aid_73 %in% c("no", "none", "nobe", "None","Not necessarily but there were some schools that he would have", "non", "n", "k", "", "he did not mention any", "bone", ".", "0", 0, "o", "np",
                                                   "Ntsanyale is not on the list but it requires school blocks. it is in need of school kit", 
                                                   "he insisted to chose a school which was not on the map but l explained to him that he is supposed to concentrate and select school which are on the map",
                                                   "changamire primary school should be given the materials because most of the schoolgoing children go to fish in the lake an or go South Africa so in order to encourage children to go to school",
                                                   "the MP was very friendly and he expressed",
                                                   "solar lumps can fit on map numbtwo dictionaries can go to MP number one and the school should be chiunjiza school kit schools go to chankhandwe"), 0, 1)

all.surveys$wrongschool=ifelse(all.surveys$aid_72 %in% c("one on the map school C is not in his ward."),
                               1, all.surveys$wrongschool)
mp.survey$wrongschool=ifelse(mp.survey$aid_72 %in% c("one on the map school C is not in his ward."),
                             1, mp.survey$wrongschool)
c.survey$wrongschool=ifelse(c.survey$aid_72 %in% c("one on the map school C is not in his ward."),
                            1, c.survey$wrongschool)



all.surveys$othermaterials=ifelse(all.surveys$aid_74 %in% c("He just stated that he has distributed the materials evenly to his constituency", "no", "none", "nobe", "not", "None","Not necessarily but there were some schools that he would have", "non", "n", "k", "", "he did not mention any", "bone", ".", "0", 0, "o", "np"), 0, 1)
mp.survey$othermaterials=ifelse(mp.survey$aid_74 %in% c("He just stated that he has distributed the materials evenly to his constituency", "no", "none", "nobe", "not", "None","Not necessarily but there were some schools that he would have", "non", "n", "k", "", "he did not mention any", "bone", ".", "0", 0, "o", "np"), 0, 1)
c.survey$othermaterials=ifelse(c.survey$aid_74 %in% c("He just stated that he has distributed the materials evenly to his constituency", "no", "none", "nobe", "not", "None","Not necessarily but there were some schools that he would have", "non", "n", "k", "", "he did not mention any", "bone", ".", "0", 0, "o", "np"), 0, 1)

#sort
all.surveys=all.surveys[order(all.surveys$resp_id, all.surveys$map_order_id, all.surveys$school_id),]


#map failed to load
all.surveys$schoolissue=F
all.surveys$schoolissue=ifelse(all.surveys$lc==0 & all.surveys$constituencyid==152 & all.surveys$map_order_id==3, TRUE, all.surveys$schoolissue)
#says not in constituency
all.surveys$schoolissue=ifelse(all.surveys$lc==0 & all.surveys$constituencyid==139 & all.surveys$school_name=="MITONDO SCHOOL", TRUE, all.surveys$schoolissue)
#says not in constituency
all.surveys$schoolissue=ifelse(all.surveys$lc==0 & all.surveys$school_id==2469, TRUE, all.surveys$schoolissue)
#says not in constituency
all.surveys$schoolissue=ifelse(all.surveys$lc==0 & all.surveys$school_id==4208, TRUE, all.surveys$schoolissue)
xtabs(~all.surveys$schoolissue)

#this is the list of schools where respondents said there was a problem (manually coded based on aid_74)
wrongschools=read.csv("./input/coding_of_enumerator_comments_about_schools.csv")


all.surveys$schoolissue_temp=wrongschools[match(paste0(all.surveys$school_id,".",all.surveys$resp_id),
                                                paste0(wrongschools$school_id,".",wrongschools$resp_id)
),"wrongschool"]


#where the respondent had issue with the knoweldge map
all.surveys$knowledgemapproblem=wrongschools[match(paste0(all.surveys$school_id,".",all.surveys$resp_id),
                                                   paste0(wrongschools$school_id,".",wrongschools$resp_id)
),"knowledgemapproblem"]

all.surveys$knowledgemapproblem=ifelse(is.na(all.surveys$knowledgemapproblem), 0, all.surveys$knowledgemapproblem)


all.surveys$schoolissue=ifelse(all.surveys$schoolissue_temp==1, 1, all.surveys$schoolissue)
all.surveys$schoolissue=ifelse(is.na(all.surveys$schoolissue), 0, all.surveys$schoolissue)

schoolissuesbymap=aggregate(all.surveys$schoolissue, list(all.surveys$map_id), max)


all.surveys$mapwithwrongschool=0
all.surveys$mapwithwrongschool=schoolissuesbymap[match(all.surveys$map_id, schoolissuesbymap$Group.1),"x"]
xtabs(~all.surveys$mapwithwrongschool)

#merge back with other surveys
temp=all.surveys[all.surveys$lc==1,]
c.survey$mapwithwrongschool=temp[match(c.survey$school_id, temp$school_id), "mapwithwrongschool"]
c.survey$knowledgemapproblem=temp[match(c.survey$school_id, temp$school_id), "knowledgemapproblem"]
temp=all.surveys[all.surveys$lc==0,]
mp.survey$mapwithwrongschool=temp[match(mp.survey$school_id, temp$school_id), "mapwithwrongschool"]
mp.survey$knowledgemapproblem=temp[match(mp.survey$school_id, temp$school_id), "knowledgemapproblem"]


####SETUP SOME ADDITIONAL VARIABLES#######

all.surveys$km_to_home_best=ifelse(all.surveys$km_to_home_best<0, NA, all.surveys$km_to_home_best)
all.surveys$school_home_distance=all.surveys$km_to_home_best
all.surveys$school_home_distance=ifelse(all.surveys$village_coding_source %in% c("centroid"), NA, all.surveys$school_home_distance)

all.surveys$school_home_distance_notlog=all.surveys$school_home_distance
all.surveys$school_home_distance=log(all.surveys$school_home_distance+1)
all.surveys$km_to_home_best=NULL



#merge distance z scores
all.surveys$z_school_home_distance=NA

mp.survey$school_home_distance=NA
c.survey$school_home_distance=NA

t=all.surveys[all.surveys$mp==1,]
mp.survey$school_home_distance=t[match(mp.survey$school_id, t$school_id),"school_home_distance"]
t=all.surveys[all.surveys$mp==0,]
c.survey$school_home_distance=t[match(c.survey$school_id, t$school_id),"school_home_distance"]


mp.survey$m_school_home_distance= mean((mp.survey$school_home_distance), na.rm=T)
mp.survey$s_school_home_distance= sd((mp.survey$school_home_distance), na.rm=T)
mp.survey$z_school_home_distance=((mp.survey$school_home_distance)-mp.survey$m_school_home_distance)/mp.survey$s_school_home_distance
all.surveys[all.surveys$mp==1,]$z_school_home_distance=mp.survey[match(all.surveys[all.surveys$mp==1,]$school_id, mp.survey$school_id),"z_school_home_distance"]


c.survey$m_school_home_distance= mean((c.survey$school_home_distance), na.rm=T)
c.survey$s_school_home_distance= sd((c.survey$school_home_distance), na.rm=T)
c.survey$z_school_home_distance=((c.survey$school_home_distance)-c.survey$m_school_home_distance)/c.survey$s_school_home_distance
all.surveys[all.surveys$mp==0,]$z_school_home_distance=c.survey[match(all.surveys[all.surveys$mp==0,]$school_id, c.survey$school_id),"z_school_home_distance"]


all.surveys$run_again=ifelse(all.surveys$aid_56=="Yes" | all.surveys$aid_57=="Yes", 1, 0)
c.survey$run_again=ifelse(c.survey$aid_56=="Yes" | c.survey$aid_57=="Yes", 1, 0)
mp.survey$run_again=ifelse(mp.survey$aid_56=="Yes" | mp.survey$aid_57=="Yes", 1, 0)


all.surveys$pop_per_hectacre=(all.surveys$pop_per_hectacre-mean(all.surveys$pop_per_hectacre, na.rm=T))/sd(all.surveys$pop_per_hectacre, na.rm=T)

all.surveys$test_donor_specific=ifelse((all.surveys$test_donor_specific<1), 0, all.surveys$test_donor_specific)
all.surveys$donor_knowledge=(all.surveys$test_donor_specific+all.surveys$test_most_projects)/2

all.surveys$survey_timeliving=all.surveys$aid_8

for(survey.name in c("mp.survey", "c.survey", "all.surveys", "mp.survey.all", "c.survey.all")){
  eval(parse(text=paste("this.survey=",survey.name, sep="")))
  
  this.survey$workedtearfund=ifelse(this.survey$aid_60=="Yes", 1, NA)
  this.survey$workedtearfund=ifelse(this.survey$aid_60 %in% c("No", "Don?<U+0080><U+0099>t know"), 0, this.survey$workedtearfund)
  
  this.survey$heardtearfund=ifelse(this.survey$aid_59=="Yes", 1, NA)
  this.survey$heardtearfund=ifelse(this.survey$aid_59 %in% c("No", "Don?<U+0080><U+0099>t know"), 0, this.survey$heardtearfund)
  
  #compliance
  this.survey$notunderstandmap=ifelse(this.survey$aid_22 %in% c("School B"), 0, 1)
  
  this.survey$otherschool=ifelse(this.survey$aid_72 %in% c("no", "none", "nobe", "None", "", "he did not mention any", "bone", ".", "0", 0, "o", "np"), 0, 1)
  
  this.survey$wrongschool=ifelse(this.survey$aid_73 %in% c("no", "none", "nobe", "None","Not necessarily but there were some schools that he would have", "non", "n", "k", "", "he did not mention any", "bone", ".", "0", 0, "o", "np"), 0, 1)
  
  this.survey$othermaterials=ifelse(this.survey$aid_74 %in% c("He just stated that he has distributed the materials evenly to his constituency", "no", "none", "nobe", "not", "None","Not necessarily but there were some schools that he would have", "non", "n", "k", "", "he did not mention any", "bone", ".", "0", 0, "o", "np"), 0, 1)
  
  this.survey$school_constituency.x=NULL
  this.survey$school_constituency.y=NULL
  this.survey$school_constituencyx=NULL
  this.survey$school_constituencyy=NULL
  this.survey$school_constituency=NULL
  
  this.survey$X.5=NULL
  this.survey$X.4=NULL
  this.survey$X.3=NULL
  this.survey$v1=NULL
  this.survey$x=NULL
  this.survey$xx=NULL
  this.survey$constituencybyelectionx=NULL
  this.survey$ps_constituencyx=NULL
  this.survey$num_schools_in_constituencyx=NULL
  this.survey$ps_constituencyx=NULL
  this.survey$map_districtx=NULL
  this.survey$map_districtx=NULL
  this.survey$ps_districtx=NULL
  this.survey$fraud_conditions=NULL
  this.survey$fraudblock=NULL
  this.survey$xy=NULL
  this.survey$x2=NULL
  
  this.survey$school_latitude=NULL
  this.survey$school_longitude=NULL
  this.survey$school_district=NULL
  this.survey$censusschoolcode=NULL
  
  this.survey$ps_constituency.x=NULL
  this.survey$ps_constituency.y=NULL
  this.survey$ps_constituencyy=NULL
  this.survey$ps_district.x=NULL
  this.survey$ps_district.y=NULL
  this.survey$ps_districty=NULL
  
  
  this.survey$upper_map_ward=NULL
  this.survey$upper_map_district=NULL
  this.survey$wardstring=NULL
  this.survey$map_districty=NULL
  
  this.survey$wardname=NULL
  this.survey$ps_wardx=NULL
  this.survey$school_wardx=NULL
  this.survey$wardbyelectionx=NULL
  this.survey$var1=NULL
  this.survey$x1=NULL
  this.survey$ps_constituency=NULL
  this.survey$ps_wardy=NULL
  this.survey$school_wardy=NULL
  
  this.survey$wardbyelectiony=NULL
  this.survey$constituencybyelection=NULL
  this.survey$wp_ward=NULL
  
  this.survey$wp_constituen=NULL
  this.survey$wp_ward=NULL
  
  
  
  this.survey$wp_sum=NULL
  this.survey$map_district.x=NULL
  this.survey$map_district.y=NULL
  this.survey$map_h=NULL
  this.survey$map_order=NULL
  this.survey$mapissue=NULL
  
  this.survey$school_count=NULL
  this.survey$school_type=NULL
  this.survey$psdistance=NULL
  this.survey$winner_name_parliamentary=NULL
  
  this.survey$instancename=NULL
  this.survey$winner_name_local=NULL
  this.survey$winner_name_local2=NULL
  this.survey$winner_name_parliamentary=NULL
  this.survey$winner_name_parliamentary2=NULL
  this.survey$school_ward=NULL
  this.survey$mecissue=NULL
  
  this.survey$constituencybyelectiony=NULL
  
  this.survey$merged_from_survey=NULL
  this.survey$version=NULL
  
  
  this.survey$instanceid=NULL
  this.survey[,colnames(this.survey)[grep("^aid_", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("^min_", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("^max_", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("^median_", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("^X", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("^submission", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("^user", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("^selected", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("^home", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("lat", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("long", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("geo", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("village", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("^temp_", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("^votes", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("age", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("count", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("turnout", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("ward_victory_margin", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("const_victory_margin", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("voters", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("incumbent_party", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("row", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("name", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("mean", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("median", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("sd", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("^form", colnames(this.survey))]]=NULL
  this.survey[,colnames(this.survey)[grep("km", colnames(this.survey))]]=NULL
  
  
  this.survey$staff_lb=NULL
  this.survey$instanceid=NULL
  this.survey$rta_form_id=NULL
  this.survey$instancehi_origi=NULL
  this.survey$instancehi=NULL
  this.survey$wp_constituency=NULL
  this.survey$wp_area=NULL
  this.survey$wp_grid_cell_count=NULL
  this.survey$wp_mean=NULL
  
  this.survey$Fraud_Conditions=NULL
  this.survey$dup=NULL
  this.survey$ward_incumbent_percent=NULL
  this.survey$const_incumbent_percent=NULL
  this.survey$winner_party_parliamentary=NULL
  this.survey$constituencybyelection.x=NULL
  this.survey$num_schools_in_constituency.x=NULL
  this.survey$X=NULL
  this.survey$X.2=NULL
  this.survey$X.x=NULL
  this.survey$X.y=NULL
  this.survey$X.1=NULL
  
  
  
  assign(survey.name, this.survey)
  
}



write.csv(mp.survey, "../data/mp_withcovariates.csv")
write.csv(c.survey, "../data/c_withcovariates.csv")
write.csv(all.surveys, "../data/all_withcovariates.csv")

write.csv(mp.survey.all, "../data/mp_withcovariates_withattritted.csv")
write.csv(c.survey.all, "../data/c_withcovariates_withattritted.csv")
write.csv(quiz.df, "../data/quiz_with_covariates.csv")
write.csv(citizen_survey, "../data/citizen_survey.csv")

write.csv(followupsurvey, "../data/followupsurvey.csv")


